clean_school<-function(gps_data_merged){
  gps_data_merged$school.gps_new<-gps_data_merged$school.gps
  
  #2017
  gps_data_merged$school.gps_new<-gsub("á","A",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("ș","S",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("ț","T",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("é","E",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("„","\"",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ó","O",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ö","O",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ö","O",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ő","O",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ü","u",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("’’","\"",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("’","\"",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub(",,","\"",gps_data_merged$school.gps_new)
  
  
  
  #2016
  gps_data_merged$school.gps_new<-gsub("“","\"",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("”","\"",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("Á","A",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("Ü","U",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ă","A",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("Ă","A",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("â","A",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("Â","A",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("É","E",gps_data_merged$school.gps_new)
  
  
  gps_data_merged$school.gps_new<-gsub("Î","I",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("Ó","O",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("Ö","O",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("Ő","O",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("Ș","S",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ş","S",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("Ş","S",gps_data_merged$school.gps_new)
  
  gps_data_merged$school.gps_new<-gsub("Ț","T",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ţ","T",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("Ţ","T",gps_data_merged$school.gps_new)
  
  #2014
  gps_data_merged$school.gps_new<-gsub("I. C. PETRESCU: STALPENI","I. C. PETRESCU\", STALPENI",gps_data_merged$school.gps_new)
  
  #2013
  gps_data_merged$school.gps_new<-gsub("‘ ","\"",gps_data_merged$school.gps_new)
  
  
  #Spacing et al
  gps_data_merged$school.gps_new<-toupper(gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("\\.([A-Za-z])", "\\. \\1",  gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("\\s+(?=\\p{Pd})|(?<=\\p{Pd})\\s+", "", gps_data_merged$school.gps_new, perl=TRUE) # suppress space before -
  gps_data_merged$school.gps_new<-trimws(gps_data_merged$school.gps_new, which = c("both")) #trim white space leading and lagging
  gps_data_merged$school.gps_new<-gsub("\\s+", " ", trimws(gps_data_merged$school.gps_new))
  
  #CHANGE WEIRD QUOTES INTO "
  gps_data_merged$school.gps_new<-gsub("''-","\" ",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("\"-", "\" ",  gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("''","\"",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("'","\"",gps_data_merged$school.gps_new)
  
  #Make some modifications to town names; remove old I-hat and replace with A-hat, lus some other changes
 
  gps_data_merged$school.gps_new<-gsub("JIU\\.","JIU,",gps_data_merged$school.gps_new) # CHANGE . to , FOR iasi
  gps_data_merged$school.gps_new<-gsub("RM\\.","RAMNICU",gps_data_merged$school.gps_new) 
  gps_data_merged$school.gps_new<-gsub('TG\\.','TARGU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('TG','TARGU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("TIRG","TARG",gps_data_merged$school.gps_new) 
  gps_data_merged$school.gps_new<-gsub("SFINT","SFANT",gps_data_merged$school.gps_new) 
  gps_data_merged$school.gps_new<-gsub('SINGEORGIU','SANGEORGIU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('SINMARTIN','SANMARTIN',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('SINTANA','SANTANA',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("RIMNICU","RAMNICU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('PINCOTA','PANCOTA',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('CIMPINA','CAMPINA',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('RISNOV','RASNOV',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('CIMPIA','CAMPIA',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('HIRSOVA','HARSOVA',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('CIMPULUNG','CAMPULUNG',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("PIATRA NEAMT","PIATRA-NEAMT",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("TARGU JIU","TARGU-JIU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("FIERBINTI-TARG","FIERBINTI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("HIRLAU","HARLAU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("TIRNAVENI","TARNAVENI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("CURTEA DE AG\\.","CURTEA DE ARGES",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("CURTEA DE AG","CURTEA DE ARGES",gps_data_merged$school.gps_new)
  
  
  #make some modifications which will faciliate matching gps_data_merged's over several years
  gps_data_merged$school.gps_new<-gsub('GR\\. SC\\.','GRUPUL SCOLAR',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('GRUP SCOLAR','GRUPUL SCOLAR',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('LIC\\.','LICEUL',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('TEHN\\.','TEHNOLOGIC',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('GRUP SC\\.','GRUPUL SCOLAR',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("AL\\. I\\.","ALEXANDRU IOAN",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("A\\. I\\.","ALEXANDRU IOAN",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("GH\\. M\\.","GHEORGHE MUNTEANU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub(" GH\\.","GHEORGHE",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("BARTOK BELA","BELA BARTOK",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("C\\. BREDICEANU","CORIOLAN BREDICEANU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("C\\. DIACONOVICI","CONSTANTIN DIACONOVICI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("C\\. NEGRI","COSTACHE NEGRI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("D\\. CANTEMIR","DIMITRIE CANTEMIR",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("G\\. VRANCEANU","GHEORGHE VRANCEANU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("GR\\.","GRIGORE",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("M\\. EMINESCU","MIHAI EMINESCU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("V\\. ALECSANDRI","VASILE ALECSANDRI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("G\\. APOSTU","GEORGE APOSTU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("A\\. SALIGNY","ANGHEL SALIGNY",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("C\\. D\\. NENITESCU","COSTIN D\\. NENITESCU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("ED\\. NICOLAU","EDMOND NICOLAU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("G-RAL","GENERAL",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("C-TIN","CONSTANTIN",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("D\\. PRAPORGESCU","DAVID PRAPORGESCU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("AL\\.","ALEXANDRU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("G\\. P\\.","GEORGE POP",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("I\\. ZOSSIMA","IORDACHE ZOSSIMA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("TG\\.","TARGU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("TG","TARGU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("J\\. LEBEL","JOHANNES LEBEL",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("J\\. M\\. ELIAS","JACQUES M\\. ELIAS",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("C-TIN","CONSTANTIN",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("M\\. BASARAB","MATEI BASARAB",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("N\\. ONCESCU","NICOLAE ONCESCU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("SF\\.","SFANTUL",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("T\\. VLADIMIRESCU","TUDOR VLADIMIRESCU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("RM\\.","RAMNICU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("I\\. L\\.","ION LUCA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("I\\. C\\. DRAGUSANU","ION CODRU DRAGUSANU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("J GREGOR TAJOVSKI","JOZEF GREGOR TAJOVSKI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("S\\. HARET","SPIRU HARET",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("SF ","SFANTU ",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub(" TIMIS$","",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("C\\. BRANCUSI","CONSTANTIN BRANCUSI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("A\\. IANCU","AVRAM IANCU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("J\\. KOZACEK","JOZEF KOZACEK",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("I\\. VULCAN","IOSIF VULCAN",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("M\\. VITEAZU","MIHAI VITEAZUL",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("S\\. VULCAN","SAMUIL VULCAN",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("A\\. SAGUNA","ANDREI SAGUNA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("T\\. VUIA","TRAIAN VUIA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("V\\. VOICULESCU","VASILE VOICULESCU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("L\\. BLAGA","LUCIAN BLAGA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("M\\. KOGALNICEANU","MIHAIL KOGALNICEANU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("N\\. BOLCAS","NICOLAE BOLCAS",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("N\\. JIGA","NICOLAE JIGA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("P\\. COSMA","PARTENIE COSMA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("D\\. LEONIDA","DIMITRIE LEONIDA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("O\\. GHIBU","ONISIFOR GHIBU",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("A\\. ROMAN","ALEXANDRU ROMAN",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("GHEORGHE MURGOCI","GHEORGHE MUNTEANU MURGOCI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("N\\. COMANECI","NADIA COMANECI",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("CU PROGRAM SPORTIV","SPORTIV",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("CU PROGRAM DE ATLETISM","SPORTIV",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('"DIMITRIE TICHINDEAL"','"PREPARANDIA-DIMITRIE TICHINDEAL"',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('N\\. VASILESCU','NICOLAE VASILESCU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('IND\\.','INDUSTRIAL',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('N PLESOIANU','NICOLAE PLESOIANU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('C\\. ANGELESCU','CONSTANTIN ANGELESCU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('DOBRESCU-ARGES','DOBRESCU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('SZENT ERZSEBET','SFANTA ELISABETA',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('SAT CIORANII DE JOS\\. COMUNA CIORANI','CIORANI',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('CIORANII DE JOS','CIORANI',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('MANECIU-UNGURENI','MANECIU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('SAT GHEABA\\. COMUNA MANECIU','MANECIU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('ION I\\. C\\. BRATIANU','ION CONSTANTIN BRATIANU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('I\\. C\\. BRATIANU','ION CONSTANTIN BRATIANU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('ION C\\. BRATIANU','ION CONSTANTIN BRATIANU',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('TEHNLOGIC','TEHNOLOGIC',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('SINTANA','SANTANA',gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub('BOLINTIN VALE','BOLINTIN-VALE',gps_data_merged$school.gps_new)
  
  #remove village name and keep only town name
  gps_data_merged$school.gps_new<-unlist(lapply(1:length(gps_data_merged$school.gps_new), function(x)
    gsub('(SAT ).*',gsub('.+?(?=COMUNA)',"\\1",gps_data_merged$school.gps_new[x],perl=TRUE),gps_data_merged$school.gps_new[x],perl=T)))
  
  gps_data_merged$school.gps_new<-gsub("MUN\\.","",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("INCLUZIA","INCLUZIVA",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("COLEGIU ","COLEGIUL ",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("LOCALITATEA ","",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("COMUNA ","",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("COMUNA ","",gps_data_merged$school.gps_new)
  


  gps_data_merged$school.gps_new<-gsub("NR\\.","NR",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("NUMARUL","NR",gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-gsub("NR(?=[0-9])","NR ",gps_data_merged$school.gps_new,perl=T)

    gps_data_merged$school.gps_new<-gsub('GALAT$','GALATI',gps_data_merged$school.gps_new)
    gps_data_merged$school.gps_new<-gsub('TARGU-MURES','TARGU MURES',gps_data_merged$school.gps_new)


    gps_data_merged[gps_data_merged$judet.gps=='MEHEDINTI',]$school.gps_new<-gsub('COLEGIUL TEHNOLOGIC$','COLEGIUL TEHNIC DE TRANSPORTURI AUTO',gps_data_merged[gps_data_merged$judet.gps=='MEHEDINTI',]$school.gps_new)

  #add space before first quotation
  gps_data_merged$school.gps_new<-sub('(.*?)"','\\1 "', gps_data_merged$school.gps_new)
  
  #add space after last quotation not followed by a coma, then delete all double spaces
  gps_data_merged$school.gps_new<-sub('(\\".*?)"(?!\\,)','\\1" ',gps_data_merged$school.gps_new,perl=T)
  gps_data_merged$school.gps_new<-gsub('(?<=\\s\")(\\s)(.*\")','\\2',gps_data_merged$school.gps_new,perl=T)
  gps_data_merged$school.gps_new<-gsub("\\s+", " ", gps_data_merged$school.gps_new)
  gps_data_merged$school.gps_new<-trimws(gps_data_merged$school.gps_new)
  
  
  gps_data_merged$school.gps<-gps_data_merged$school.gps_new
  gps_data_merged<-gps_data_merged %>% select (-school.gps_new)

  
  return(gps_data_merged)
}